1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
use crate::sketchbook::observations::{Dataset, Observation, ObservationManager, VarValue};
use std::fs::File;
use std::str::FromStr;

impl ObservationManager {
    /// Load a dataset from given CSV file. The header line specifies variables, following lines
    /// represent individual observations (id and values).
    ///
    /// The resulting dataset has empty annotation string (same for all its observations).
    ///
    /// For example, the following might be a valid CSV file for a dataset with 2 observations:
    ///    ID,YOX1,CLN3,YHP1,ACE2,SWI5,MBF
    ///    Observation1,0,1,0,1,0,1
    ///    Observation2,1,0,*,1,0,*
    ///
    pub fn load_dataset(name: &str, csv_path: &str) -> Result<Dataset, String> {
        let csv_file = File::open(csv_path).map_err(|e| e.to_string())?;
        let mut rdr = csv::Reader::from_reader(csv_file);

        // parse variable names from the header
        let header = rdr.headers().map_err(|e| e.to_string())?.clone();
        let variables = header.into_iter().skip(1).collect::<Vec<&str>>().clone();

        // parse all raws as observations
        let mut observations = Vec::new();
        for result in rdr.records() {
            let record = result.map_err(|e| e.to_string())?;
            if record.is_empty() {
                return Err("Cannot import empty observation.".to_string());
            }
            let id: &str = record.get(0).unwrap();
            let values: Vec<VarValue> = record
                .iter()
                .skip(1)
                .map(VarValue::from_str)
                .collect::<Result<Vec<VarValue>, String>>()?;
            let observation = Observation::new(values, id)?;
            observations.push(observation);
        }
        Dataset::new(name, observations, variables)
    }

    /// Load a dataset from given CSV file, and add it to this `ObservationManager`. The header
    /// line specifies variables, following lines represent individual observations (id and values).
    ///
    /// See [Self::load_dataset] for details.
    pub fn load_and_add_dataset(&mut self, csv_path: &str, id: &str) -> Result<(), String> {
        // use same name as ID
        let dataset = Self::load_dataset(id, csv_path)?;
        self.add_dataset_by_str(id, dataset)
    }
}